package com.trytech.mongoocrawler.server.parser.jd;

import com.trytech.mongoocrawler.server.common.queue.UrlFetcherEventProducer;
import com.trytech.mongoocrawler.server.parser.HtmlParser;
import com.trytech.mongoocrawler.server.transport.http.UrlResult;
import com.trytech.mongoocrawler.server.transport.http.WebResult;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.nio.charset.Charset;
import java.util.Iterator;

/**
 * Created by coliza on 2017/4/15.
 */
public class JDBookCate2Parser extends HtmlParser<Boolean>{
    @Override
    public Boolean parse(WebResult webResult, UrlFetcherEventProducer urlProducer) {
        try {
            String html = ((WebResult<String>)webResult).getData();
            Document doc = Jsoup.parse(html);
            doc.charset(Charset.forName("UTF-8"));
            Element body = doc.body();
            //获取包含图书种类的div
            Element bookeCatEle = body.getElementById("p-category");
            Element menuEle = bookeCatEle.getElementsByClass("menu").first();
            Elements itemEles = menuEle.getElementsByClass("item");

            for (Iterator ite = itemEles.iterator(); ite.hasNext(); ) {
                Element ele = (Element) ite.next();
                Element categoryEle = ele.getElementsByClass("ext").first();
                Elements categoryTagAEles = categoryEle.getElementsByTag("a");
                for(Iterator ite2 = categoryTagAEles.iterator();ite2.hasNext();) {
                    Element tagAEle = (Element) ite2.next();
                    String cate_url = tagAEle.attr("href");
                    if (StringUtils.isNotEmpty(cate_url)) {
                        urlProducer.sendData(new UrlResult("https:" + cate_url, new JDBookListParser()));
                    }
                }
            }

            return true;
        }catch (Exception e){
            e.printStackTrace();
            return false;
        }

    }
}
